
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta
import datetime as dt
from datetime import date
import seaborn as sns
import math
import random
import collections

import matplotlib.pylab as pl
import matplotlib.gridspec as gridspec
from matplotlib.ticker import FormatStrFormatter



##Clean Main file
main_data= pd.read_csv("../Data_all_with_County.csv")            
main_data['Account_Role__c']= main_data['Account_Role__c'].fillna("Student")
main_data= main_data.rename(columns={"collection_date": "Date"})
main_data['Date']= pd.to_datetime(main_data["Date"])
main_data= main_data[main_data['reason_for_poc_test']!= 'Reflex']
main_data.loc[main_data['test_result']== "Presumptive Positive", 'test_result']= "Positive"
main_data['poc_test_type']= (main_data['poc_test_type']).fillna(main_data['test_type'] + " test")
main_data['reason_for_poc_test']= (main_data['reason_for_poc_test']).fillna(main_data['test_type'] + " test reason")

data_biweekly= pd.DataFrame(main_data.groupby(['related_organization', 'poc_test_type','reason_for_poc_test', 'test_type', 'Account_Role__c', 'County', pd.Grouper(key='Date', freq='2W-Sun')])['test_result'].value_counts()).rename(columns={"count": "test_result_count"}).reset_index()

####################

##Clean consent file
consent= pd.read_excel('../2022-08-09 MA CTC consent demographics.xlsx', sheet_name= 1)
consent= consent.rename(columns= {'Date_of_Consent__pc': 'Date'})
consent.loc[consent['Date'] < "2021-09-05", "Date"]= "2021-09-05"
consent.loc[consent['Date'] > "2022-06-24", "Date"]= "2022-06-24"
consent['Date']= pd.to_datetime(consent['Date'])
consent.loc[consent['Rapid_Response_Client_Organization__r.Name'] == "Dorothy L. Beckwith Middle\xa0School", 'Rapid_Response_Client_Organization__r.Name']= "Dorothy L. Beckwith Middle"
consent_grouped= pd.DataFrame(consent.groupby(['Rapid_Response_Client_Organization__r.Name', pd.Grouper(key='Date', freq='W-Sun')]).agg({'Rapid_Response_Client_Organization__r.Name':'count', 'total_population':'last'})).rename(columns={"Rapid_Response_Client_Organization__r.Name": "consented_students"}).reset_index().rename(columns={"Rapid_Response_Client_Organization__r.Name": "related_organization"})
consent_grouped['consented_students_cumsum']= consent_grouped.groupby(['related_organization'])['consented_students'].cumsum()
consent_grouped= consent_grouped.rename(columns={"total_population":"school_population"})

##Adding School population where school population is null
consent_grouped.loc[consent_grouped['related_organization'] == "Dawson Elementary School", 'school_population']= 499
consent_grouped.loc[consent_grouped['related_organization'] == "Ashby Elementary School", 'school_population']= 139
consent_grouped.loc[consent_grouped['related_organization'] == "Devereux Advanced Behavioral Health School", 'school_population']= 73
consent_grouped.loc[consent_grouped['related_organization'] == "Leroy E Mayo Elementary School", 'school_population']= 491
consent_grouped.loc[consent_grouped['related_organization'] == "Martha's Vineyard Charter", 'school_population']= 181
consent_grouped.loc[consent_grouped['related_organization'] == "Mountview Middle School", 'school_population']= 722
consent_grouped.loc[consent_grouped['related_organization'] == "Narragansett Middle and High School", 'school_population']= 359
consent_grouped.loc[consent_grouped['related_organization'] == "Nashoba Learning Group", 'school_population']= 129
consent_grouped.loc[consent_grouped['related_organization'] == "New Braintree Grade School", 'school_population']= 38
consent_grouped.loc[consent_grouped['related_organization'] == "Nissitissit Middle School", 'school_population']= 481
consent_grouped.loc[consent_grouped['related_organization'] == "North Adams Public Schools", 'school_population']= 1208
consent_grouped.loc[consent_grouped['related_organization'] == "North Middlesex Regional High School", 'school_population']= 779
consent_grouped.loc[consent_grouped['related_organization'] == "North River Collaborative", 'school_population']= 70
consent_grouped.loc[consent_grouped['related_organization'] == "Park Street School - Preschool", 'school_population']= 199
consent_grouped.loc[consent_grouped['related_organization'] == "Pioneer Charter School of Science", 'school_population']= 783
consent_grouped.loc[consent_grouped['related_organization'] == "Promise College and Career Academy", 'school_population']= 38
consent_grouped.loc[consent_grouped['related_organization'] == "Southeastern Regional Vocational Technical High School", 'school_population']= 1558
consent_grouped.loc[consent_grouped['related_organization'] == "St Leo", 'school_population']= 233
consent_grouped.loc[consent_grouped['related_organization'] == "St Mary Of Annunciation", 'school_population']= 307
consent_grouped.loc[consent_grouped['related_organization'] == "St. Mary's High School", 'school_population']= 517
consent_grouped.loc[consent_grouped['related_organization'] == "Varnum Brook Elementary School", 'school_population']= 633
consent_grouped.loc[consent_grouped['related_organization'] == "William J. Ostiguy High School", 'school_population']= 75
consent_grouped.loc[consent_grouped['related_organization'] == "Community Day Charter Public School - Gateway (District)", 'school_population']= 401
consent_grouped.loc[consent_grouped['related_organization'] == "Dighton-Rehoboth", 'school_population']= 2477
consent_grouped.loc[consent_grouped['related_organization'] == "Hanover Public School District", 'school_population']= 2579
consent_grouped.loc[consent_grouped['related_organization'] == "KIPP Academy Boston Charter School (District)", 'school_population']= 577
consent_grouped.loc[consent_grouped['related_organization'] == "MATCH Charter Public School (District)", 'school_population']= 1186
consent_grouped.loc[consent_grouped['related_organization'] == "KIPP Academy Boston Charter School (District)", 'school_population']= 1186
consent_grouped.loc[consent_grouped['related_organization'] == "Squannacook Elementary School", 'school_population']= 47

consent_grouped= consent_grouped[consent_grouped['consent_population']!= 0]
consent_grouped['consent_rate_cumsum']= consent_grouped['consented_students_cumsum'] / consent_grouped['school_population'] 

######################
##Add town

town= pd.read_excel('../towns.xlsx')

consent_grouped['town']= " "
consent_grouped['County']= " "


for i in(range(len(consent_grouped))):
    for j in range(len(town)):
        if consent_grouped.loc[i, 'related_organization'] == town.loc[j, 'District Name']:
            consent_grouped.loc[i, 'town'] = town.loc[j, 'Town/City']
            consent_grouped.loc[i, 'County'] = town.loc[j, 'County']

##Adding town where town is null

consent_grouped.loc[consent_grouped['related_organization']== 'Lawrence Catholic Academy', 'town']= 'Lawrence'
consent_grouped.loc[consent_grouped['related_organization']== 'La Familia Dual Language School', 'town']= 'Worcester'
consent_grouped.loc[consent_grouped['related_organization']== 'Fuller Meadow Elementary School', 'town']= 'Middleton'
consent_grouped.loc[consent_grouped['related_organization']== 'Joseph Case High School', 'town']= 'Swansea'
consent_grouped.loc[consent_grouped['related_organization']== 'Quashnet Elementary School', 'town']= 'Mashpee'
consent_grouped.loc[consent_grouped['related_organization']== 'Saint Joseph Prep High School', 'town']= 'Boston'
consent_grouped.loc[consent_grouped['related_organization']== 'The Campus School at Boston College', 'town']= 'Chestnut Hill'
consent_grouped.loc[consent_grouped['related_organization']== 'Jordan/Jackson Elementary', 'town']= 'Mansfield'
consent_grouped.loc[consent_grouped['related_organization']== 'Easton Middle School', 'town']= 'North Easton'
consent_grouped.loc[consent_grouped['related_organization']== 'Walpole High School', 'town']= 'Walpole'
consent_grouped.loc[consent_grouped['related_organization']== 'Grafton Middle School', 'town']= 'Grafton'

consent_grouped= consent_grouped[['town', 'County','related_organization', 'Date', 'consented_students', 
                                              'school_population', 'consented_students_cumsum', 
                                              'consent_rate_cumsum']]


consent_grouped_town= consent_grouped.groupby(['Date','related_organization', 'town', 'County']).agg({'consented_students_cumsum': 'last', 'school_population': 'first'}).reset_index()
consent_grouped_town= consent_grouped_town.groupby(['Date','town', 'County']).agg({'consented_students_cumsum': 'sum', 'school_population': 'sum'}).reset_index().rename(columns= {'consented_students_cumsum': 'town_consented_students', 'school_population': 'town_student_population'})


data_all= pd.merge(data_biweekly, consent_grouped_town, on= ['Date', 'related_organization', 'County'], how= 'left')

######################

#read and clean  covid files
covid= pd.read_csv("../covid-19-biweekly_county_wise.csv")
covid['Date']= pd.to_datetime(covid['Date'])

##merge main file with covid and vaccine
data_all= pd.merge(data_all, covid, on=['Date', 'County'])
data_all= data_all.rename(columns= {'Population': 'county_population'})


data_all['one_dose_vaccinated_porportion']= data_all['Individuals with at least one dose']/data_all['county_population']
data_all['fully_vaccinated_porportion']= data_all['Fully vaccinated individuals']/data_all['county_population']
data_all['partially_vaccinated_porportion']= data_all['Partially vaccinated individuals']/data_all['county_population']

data_all['is_pool']= 0
data_all.loc[data_all['test_type']== "Pool", "is_pool"]= 1

data_all['is_group']= 0
data_all.loc[data_all['test_type']== "Group", "is_group"]= 1

###########################

##Including public Schools
public_schools= pd.read_csv('../All_publics.csv')
pub_schools=list(public_schools['related_organization'].unique()) 

data_all['school_type']= "private"
data_all.loc[data_all['related_organization'].isin(pub_schools), 'school_type']= 'public'

###########################
##District Level Variables

district_vars= pd.read_excel('../public_schools.xlsx')

charter= district_vars[district_vars['Charter']== 'Yes']
charter_schools= charter['School Name'].unique()

data_all['charter']= 0
data_all.loc[data_all['related_organization'].isin(charter_schools), 'charter']= 1

extra_charter= data_all[data_all.apply(lambda x: 'charter' in x['related_organization'].lower(), axis = 1)]
extra_charter_school= extra_charter['charter']
extra_charter_school= extra_charter_school[extra_charter_school!=0]
extra_charter_school_indx= list(extra_charter_school.index)

data_all.loc[(data_all.index.isin(extra_charter_school_indx)) & (data_all['charter']== 0), 'charter']= 1

title1= district_vars[(district_vars['Title I School*']== 'Yes')| (district_vars['Title 1 School Wide*']== 'Yes')]
title1_schools= title1['School Name'].unique()

data_all['title1']= 0
data_all.loc[data_all['related_organization'].isin(title1_schools), 'title1']= 1

rural_list= ['Rural: Distant', 'Rural: Fringe']
rural= district_vars[district_vars['Locale*'].isin(rural_list)]
rural_schools= rural['School Name'].unique()

data_all['rural']= 0
data_all.loc[data_all['related_organization'].isin(rural_schools), 'rural']= 1

###################################

##Adding Demographics

public_demographics= pd.read_csv('../All_publics.csv')
private_demographics= pd.read_excel('../private_schools.xlsx')

data_all['white']= 0
data_all['hispanic']= 0
data_all['black']= 0
data_all['other']= 0

race_cols= ['white', 'hispanic', 'black', 'other']

for i in range(len(data_all)):
    for j in range(len(public_demographics)):
        if (data_all.loc[i, 'related_organization']== public_demographics.loc[j, 'related_organiation']):
            for k in race_cols:
                if data_all.loc[i, k]== 0:
                    data_all.loc[i, 'white']= public_demographics.loc[j, 'White']
                    data_all.loc[i, 'hispanic']= public_demographics.loc[j, 'Hispanic']
                    data_all.loc[i, 'black']= public_demographics.loc[j, 'African American']
                    data_all.loc[i, 'other']= public_demographics.loc[j, 'Other']
                    


for i in range(len(data_all)):
    for j in range(len(private_demographics)):
        if (data_all.loc[i, 'related_organization']== private_demographics.loc[j, 'PSS_INST']):
            data_all.loc[i, 'white']= private_demographics.loc[j, 'PSS_RACE_W']
            data_all.loc[i, 'hispanic']= private_demographics.loc[j, 'PSS_RACE_H']
            data_all.loc[i, 'black']= private_demographics.loc[j, 'PSS_RACE_B']
            data_all.loc[i, 'other']= private_demographics.loc[j, 'PSS_RACE_O']
            

########################

school_level= pd.read_csv('../school_level.csv')
school_level_dict= dict()

for i in range(len(school_level)):
    school_level_dict[school_level.loc[i, 'name']]= school_level.loc[i, 'level']
    
data_all['school_level']= "x"

for i in range(len(data_all)):
    for k, v in school_level_dict.items():
        if (data_all.loc[i, 'related_organization'] == k) & (data_all.loc[i, 'school_level'] == "x"):
            data_all.loc[i, 'school_level'] = v     


########################

##Adding Vaccine with age group

vaccine_age= pd.read_csv("../biweekly_town_age_vaccine_data_oct10.csv")
vaccine_age['Date']= pd.to_datetime(vaccine_age['Date'])
vaccine_age['town']= (vaccine_age['town'].str.lower())


vaccine_elem= vaccine_age[(vaccine_age['Age Group']== '5-11 Years')]
vaccine_elem= vaccine_elem[['Date', 'town', 'Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita']]

vaccine_middle= vaccine_age[(vaccine_age['Age Group']== '12-15 Years')]
vaccine_middle= vaccine_middle[['Date', 'town', 'Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita']]

vaccine_high= vaccine_age[(vaccine_age['Age Group']== '16-19 Years')]
vaccine_high= vaccine_high[['Date', 'town', 'Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita']]

vaccine_elem_midd= vaccine_age[(vaccine_age['Age Group']== '5-11 Years') | (vaccine_age['Age Group']== '12-15 Years')]
vaccine_elem_midd= vaccine_elem_midd.groupby(['Date', 'town', 'Age Group'])['Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita'].last().reset_index()
vaccine_elem_midd= vaccine_elem_midd.groupby(['Date', 'town'])['Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita'].mean().reset_index()

vaccine_midd_high= vaccine_age[(vaccine_age['Age Group']== '12-15 Years') | (vaccine_age['Age Group']== '16-19 Years')]
vaccine_midd_high= vaccine_midd_high.groupby(['Date', 'town', 'Age Group'])['Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita'].last().reset_index()
vaccine_midd_high= vaccine_midd_high.groupby(['Date', 'town'])['Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita'].mean().reset_index()


vaccine_all= vaccine_age[(vaccine_age['Age Group']== '5-11 Years') | (vaccine_age['Age Group']== '12-15 Years') | (vaccine_age['Age Group']== '16-19 Years')]
vaccine_all= vaccine_all.groupby(['Date', 'town', 'Age Group'])['Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita'].last().reset_index()
vaccine_all= vaccine_all.groupby(['Date', 'town'])['Individuals with at least one dose per capita', 'Fully vaccinated individuals per capita', 'Partially vaccinated individuals per capita'].mean().reset_index()


data_all= pd.merge(data_all, vaccine_elem, on= ['Date', 'town'], how= 'left')
data_all= data_all.rename(columns= {'Individuals with at least one dose per capita': 'one_dose_elementary_porportion', 'Fully vaccinated individuals per capita': 'Full_elementary_porportion', 'Partially vaccinated individuals per capita': 'Partiall_elementary_porportion'})

data_all= pd.merge(data_all, vaccine_middle, on= ['Date', 'town'], how= 'left')
data_all= data_all.rename(columns= {'Individuals with at least one dose per capita': 'one_dose_middle_porportion', 'Fully vaccinated individuals per capita': 'Full_middle_porportion', 'Partially vaccinated individuals per capita': 'Partiall_middle_porportion'})

data_all= pd.merge(data_all, vaccine_high, on= ['Date', 'town'], how= 'left')
data_all= data_all.rename(columns= {'Individuals with at least one dose per capita': 'one_dose_high_porportion', 'Fully vaccinated individuals per capita': 'Full_high_porportion', 'Partially vaccinated individuals per capita': 'Partiall_high_porportion'})

data_all= pd.merge(data_all, vaccine_elem_midd, on= ['Date', 'town'], how= 'left')
data_all= data_all.rename(columns= {'Individuals with at least one dose per capita': 'one_dose_em_porportion', 'Fully vaccinated individuals per capita': 'Full_em_porportion', 'Partially vaccinated individuals per capita': 'Partiall_em_porportion'})

data_all= pd.merge(data_all, vaccine_midd_high, on= ['Date', 'town'], how= 'left')
data_all= data_all.rename(columns= {'Individuals with at least one dose per capita': 'one_dose_mh_porportion', 'Fully vaccinated individuals per capita': 'Full_mh_porportion', 'Partially vaccinated individuals per capita': 'Partiall_mh_porportion'})

data_all= pd.merge(data_all, vaccine_all, on= ['Date', 'town'], how= 'left')
data_all= data_all.rename(columns= {'Individuals with at least one dose per capita': 'one_dose_all_porportion', 'Fully vaccinated individuals per capita': 'Full_all_porportion', 'Partially vaccinated individuals per capita': 'Partiall_all_porportion'})

data_all.loc[data_all['school_level']== 'elementary/middle', 'Full_district_porportion']= data_all['Full_em_porportion']
data_all.loc[data_all['school_level']== 'middle/high', 'Full_district_porportion']= data_all['Full_mh_porportion']
data_all.loc[data_all['school_level']== 'middle', 'Full_district_porportion']= data_all['Full_middle_porportion']
data_all.loc[data_all['school_level']== 'high', 'Full_district_porportion']= data_all['Full_high_porportion']
data_all.loc[data_all['school_level']== 'elementary', 'Full_district_porportion']= data_all['Full_elementary_porportion']

###########################

#defining lo vs high vaccinated schools

data_all['is_low']= 0
data_all.loc[(data_all['school_level'] == 'elementary') & (data_all['one_dose_elementary_porportion'] <= 0.5) & (data_all['Date'] >= "2022-01-01"), 'is_low']= 1
data_all.loc[(data_all['school_level'] == 'high') &(data_all['Full_high_porportion'] <= 0.5) & (data_all['Date'] >= "2022-01-01"), 'is_low']= 1

##########################

#calculatin positivity rate

all_tests= data_all.groupby(['related_organization', 'Date'])['test_result_count'].sum().reset_index()

pos_tests= data_all[data_all['test_result']== 'Positive']
pos_tests= pos_tests.groupby('related_organization', 'Date')['test_result_count'].sum().reset_index()

all_tests= pd.merge(all_tests, pos_tests, on=['related_organization', 'Date'], how= 'left')

all_tests['positivity_rate']= all_tests['test_result_count_y'] / all_tests['test_result_count_x']
all_tests= all_tests.drop(columns= ['test_result_count_x', 'test_result_count_y'])
all_tests= all_tests.fillna(0)

data_all= pd.merge(data_all, all_tests, on= ['related_organization', 'Date'])

data_all.to_csv("../data_all.csv", index= False)

#############################

##Plot Fig 2

all_tests= data_all.groupby('Date')['test_result_count'].sum().reset_index()
all_tests= all_tests.set_index('Date')

all_pos_tests= all_tests[all_tests['test_result']== 'Positive']
all_pos_tests= all_pos_tests.groupby('Date')['test_result_count'].sum().reset_index()
all_pos_tests= all_pos_tests.set_index('Date')

#Test to Staye Positivity rate
tts_df = data_all[data_all['reason_for_poc_test']== 'Test to Stay']
tts_tests= tts_df.groupby(['Date', 'related_organization'])['test_result_count'].sum().reset_index()

tts_pos= tts_df[tts_df['test_result']== 'Positive']
tts_pos= tts_pos.groupby(['Date', 'related_organization'])['test_result_count'].sum().reset_index()

tts_tests= pd.merge(tts_tests, tts_pos, on= ['Date', 'related_organization'], how= 'left').fillna(0)
tts_tests['positivity_rate']= tts_tests['test_result_count_y'] / tts_tests['test_result_count_x']
tts_tests= tts_tests[['Date', 'related_organization', 'positivity_rate']]
tts_tests= tts_tests.set_index('Date')
##################

#Surveillance Test Positivity rate

Surveillance= ['Pool test reason', 'Group test reason', 'Screening']

surv_df = data_all[data_all['reason_for_poc_test'].isin(Surveillance)]

surv_tests= surv_df.groupby(['Date', 'related_organization'])['test_result_count'].sum().reset_index()

surv_pos= surv_tests[surv_tests['test_result']== 'Positive']
surv_pos= surv_pos.groupby(['Date', 'related_organization'])['test_result_count'].sum().reset_index()

surv_tests= pd.merge(surv_tests, surv_pos, on= ['Date', 'related_organization'], how= 'left').fillna(0)
surv_tests['positivity_rate']= surv_tests['test_result_count_y'] / surv_tests['test_result_count_x']
surv_tests= surv_tests[['Date', 'related_organization', 'positivity_rate']]
surv_tests= surv_tests.set_index('Date')

##################
#Vaccination_rate
Barnstable= data_all[data_all['County']== "Barnstable"]
Barnstable= Barnstable.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Berkshire= data_all[data_all['County']== "Berkshire"]
Berkshire= Berkshire.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Bristol= data_all[data_all['County']== "Bristol"]
Bristol= Bristol.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Dukes= data_all[data_all['County']== "Dukes"]
Dukes= Dukes.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Essex= data_all[data_all['County']== "Essex"]
Essex= Essex.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Franklin= data_all[data_all['County']== "Franklin"]
Franklin= Franklin.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Hampden= data_all[data_all['County']== "Hampden"]
Hampden= Hampden.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Hampshire= data_all[data_all['County']== "Hampshire"]
Hampshire= Hampshire.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Middlesex= data_all[data_all['County']== "Middlesex"]
Middlesex= Middlesex.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Nantucket= data_all[data_all['County']== "Nantucket"]
Nantucket= Nantucket.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Norfolk= data_all[data_all['County']== "Norfolk"]
Norfolk= Norfolk.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Plymouth= data_all[data_all['County']== "Plymouth"]
Plymouth= Plymouth.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Suffolk= data_all[data_all['County']== "Suffolk"]
Suffolk= Suffolk.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()

Worcester= data_all[data_all['County']== "Worcester"]
Worcester= Worcester.groupby('Date')['fully_vaccinated_porportion'].mean().reset_index()


all_counties_vacc= pd.merge(Barnstable, Berkshire, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Bristol, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Dukes, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Essex, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Franklin, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Hampden, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Hampshire, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Middlesex, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Nantucket, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Norfolk, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Plymouth, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Suffolk, on='Date')
all_counties_vacc= pd.merge(all_counties_vacc, Worcester, on='Date')
all_counties_vacc['mean_vacc']= all_counties_vacc.mean(axis=1)
all_counties_vacc= all_counties_vacc[['Date', 'mean_vacc']]
all_counties_vacc= all_counties_vacc.set_index('Date')


##mean cases
Barnstable= data_all[data_all['County']== "Barnstable"]
Barnstable['case_per_100']= (Barnstable['Total Confirmed Cases'] / Barnstable['county_population']) * 100000
Barnstable= Barnstable.groupby('Date')['case_per_100'].mean().reset_index()


Berkshire= data_all[data_all['County']== "Berkshire"]
Berkshire['case_per_100']= (Berkshire['Total Confirmed Cases'] / Berkshire['county_population']) * 100000
Berkshire= Berkshire.groupby('Date')['case_per_100'].mean().reset_index()


Bristol= data_all[data_all['County']== "Bristol"]
Bristol['case_per_100']= (Bristol['Total Confirmed Cases'] / Bristol['county_population']) * 100000
Bristol= Bristol.groupby('Date')['case_per_100'].mean().reset_index()

Dukes= data_all[data_all['County']== "Dukes"]
Dukes['case_per_100']= (Dukes['Total Confirmed Cases'] / Dukes['county_population']) * 100000
Dukes= Dukes.groupby('Date')['case_per_100'].mean().reset_index()

Essex= data_all[data_all['County']== "Essex"]
Essex['case_per_100']= (Essex['Total Confirmed Cases'] / Essex['county_population']) * 100000
Essex= Essex.groupby('Date')['case_per_100'].mean().reset_index()

Franklin= data_all[data_all['County']== "Franklin"]
Franklin['case_per_100']= (Franklin['Total Confirmed Cases'] / Franklin['county_population']) * 100000
Franklin= Franklin.groupby('Date')['case_per_100'].mean().reset_index()

Hampden= data_all[data_all['County']== "Hampden"]
Hampden['case_per_100']= (Hampden['Total Confirmed Cases'] / Hampden['county_population']) * 100000
Hampden= Hampden.groupby('Date')['case_per_100'].mean().reset_index()

Hampshire= data_all[data_all['County']== "Hampshire"]
Hampshire['case_per_100']= (Hampshire['Total Confirmed Cases'] / Hampshire['county_population']) * 100000
Hampshire= Hampshire.groupby('Date')['case_per_100'].mean().reset_index()

Middlesex= data_all[data_all['County']== "Middlesex"]
Middlesex['case_per_100']= (Middlesex['Total Confirmed Cases'] / Middlesex['county_population']) * 100000
Middlesex= Middlesex.groupby('Date')['case_per_100'].mean().reset_index()

Nantucket= data_all[data_all['County']== "Nantucket"]
Nantucket['case_per_100']= (Nantucket['Total Confirmed Cases'] / Nantucket['county_population']) * 100000
Nantucket= Nantucket.groupby('Date')['case_per_100'].mean().reset_index()

Norfolk= data_all[data_all['County']== "Norfolk"]
Norfolk['case_per_100']= (Norfolk['Total Confirmed Cases'] / Norfolk['county_population']) * 100000
Norfolk= Norfolk.groupby('Date')['case_per_100'].mean().reset_index()

Plymouth= data_all[data_all['County']== "Plymouth"]
Plymouth['case_per_100']= (Plymouth['Total Confirmed Cases'] / Plymouth['county_population']) * 100000
Plymouth= Plymouth.groupby('Date')['case_per_100'].mean().reset_index()

Suffolk= data_all[data_all['County']== "Suffolk"]
Suffolk['case_per_100']= (Suffolk['Total Confirmed Cases'] / Suffolk['county_population']) * 100000
Suffolk= Suffolk.groupby('Date')['case_per_100'].mean().reset_index()

Worcester= data_all[data_all['County']== "Worcester"]
Worcester['case_per_100']= (Worcester['Total Confirmed Cases'] / Worcester['county_population']) * 100000
Worcester= Worcester.groupby('Date')['case_per_100'].mean().reset_index()


all_counties= pd.merge(Barnstable, Berkshire, on='Date')
all_counties= pd.merge(all_counties, Bristol, on='Date')
all_counties= pd.merge(all_counties, Dukes, on='Date')
all_counties= pd.merge(all_counties, Essex, on='Date')
all_counties= pd.merge(all_counties, Franklin, on='Date')
all_counties= pd.merge(all_counties, Hampden, on='Date')
all_counties= pd.merge(all_counties, Hampshire, on='Date')
all_counties= pd.merge(all_counties, Middlesex, on='Date')
all_counties= pd.merge(all_counties, Nantucket, on='Date')
all_counties= pd.merge(all_counties, Norfolk, on='Date')
all_counties= pd.merge(all_counties, Plymouth, on='Date')
all_counties= pd.merge(all_counties, Suffolk, on='Date')
all_counties= pd.merge(all_counties, Worcester, on='Date')
all_counties['mean_cases']= all_counties.mean(axis=1)
all_counties= all_counties[['Date', 'mean_cases']]
all_counties= all_counties.set_index('Date')

##Plotting

fig = plt.figure(figsize=(28, 25), dpi=150)

plt.rcParams["font.family"] = "Arial"

gs = gridspec.GridSpec(3,2)

ax1=fig.add_subplot(gs[0,0])
ax2=fig.add_subplot(gs[0,1])
ax3=fig.add_subplot(gs[1,0])
ax4=fig.add_subplot(gs[1,1])
ax5=fig.add_subplot(gs[2,0])
ax6=fig.add_subplot(gs[2,1])


ax1.set_title('\nA- All Positive tests\n', size= 30)
ax1.set(ylabel="Number of Positive Tests\n")
ax1.yaxis.get_label().set_fontsize(30)
ax1.xaxis.get_label().set_fontsize(30)
ax2.set_title('\nB- All tests Performed\n', size= 30)
ax2.set(ylabel="Number of Tests\n")
ax2.yaxis.get_label().set_fontsize(30)
ax2.xaxis.get_label().set_fontsize(30)
ax3.set_title('\n\nC- Test to Stay Positivity Rate\n', size= 30)
ax3.set(ylabel="Positivity Rate (%)\n")
ax3.yaxis.get_label().set_fontsize(30)
ax3.xaxis.get_label().set_fontsize(30)
ax3.yaxis.set_major_formatter(FormatStrFormatter('%.0f'))
ax4.set_title('\n\nD- Surveillance Positivity Rate\n', size= 30)
ax4.set(ylabel="Positivity Rate (%)\n")
ax4.yaxis.get_label().set_fontsize(30)
ax4.xaxis.get_label().set_fontsize(30)
ax5.set_title('\n\nE- County Vaccination Rate (per 100k Population)\n', size= 30)
ax5.set(ylabel="Vaccination Rate (%)\n")
ax5.yaxis.get_label().set_fontsize(30)
ax5.xaxis.get_label().set_fontsize(30)
ax6.set_title('\n\nF- COVID Incidence Rate (per 100k Population)\n', size= 30)
ax6.set(ylabel="Incidence Rate\n")
ax6.yaxis.get_label().set_fontsize(30)
ax6.xaxis.get_label().set_fontsize(30)

all_pos_tests['test_result_count'].plot(ax = ax1, style= "-.*", linewidth=5, color= "#0000FF", fontsize=30, label= "All Positive tests")
(all_tests['test_result_count']).plot(ax = ax2, style= "-.*", linewidth=5, color= "#FF0000", fontsize=30, label= "All Tests Performed")
(tts_tests['positivity_rate']* 100).plot(ax = ax3, style= "-.*", linewidth=5, color= "#008000", fontsize=30, label= "Test to Stay Positivity Rate")
(surv_tests['positivity_rate']* 100).plot(ax = ax4, style= "-.*", linewidth=5, color= "#DAA520", fontsize=30, label= "Surveillance Testing Positivity Rate")
(all_counties_vacc['mean_vacc']*100).plot(ax = ax5, style= "-.*", linewidth=5, color= "#380282", fontsize=30, label= "Mean Vaccination Rate per 100k")
all_counties['mean_cases'].plot(ax = ax6, style= "-.*", linewidth=5, color= "#FF81C0", fontsize=30, label= "Mean New Cases per 100k")

txt = "\n\n\n\n\nA- Sum of all positive tests by all tests; B- Number of all tests that performed; C- Sum of all positive test to stay tests by all test to stay test results;\nD- Sum of all positive surveillance tests by all surveillance test results\n "\
    
fig.tight_layout(pad=3.0)
plt.figtext(0.5, 0, txt, wrap=True, fontsize= 30, horizontalalignment= 'center', verticalalignment = 'center')

plt.show()

#############################


tts_df= data_all[data_all['reason_for_poc_test']== 'Test to Stay']
surv_df= data_all[data_all['reason_for_poc_test'].isin(Surveillance)]

x= list(surv_df['related_organization'].unique())
y= list(tts_df['related_organization'].unique())
	
##Propensity score matching

propensity_match_treat= data_all[data_all['reason_for_poc_test'].isin(Surveillance)]

propensity_match_treat['surv_policy_go_live_date']= pd.to_datetime(propensity_match_treat['surv_policy_go_live_date'])
propensity_match_treat= propensity_match_treat[propensity_match_treat['surv_policy_go_live_date']== "2021-09-19"]
propensity_match_treat['group']= 1

propensity_match_treat= propensity_match_treat[propensity_match_treat['related_organization'].isin(y)]

trt_school= list(propensity_match_treat['related_organization'].unique())


propensity_match_cntrl= data_all[data_all['surv_policy_go_live_date']> "2021-09-19"]
propensity_match_cntrl['group']= 0


propensity_match_data= propensity_match_treat.append(propensity_match_cntrl)
propensity_match_data['surv_policy_go_live_date']= pd.to_datetime(propensity_match_data['surv_policy_go_live_date'])


propensity_match_data['group']= 0
propensity_match_data.loc[propensity_match_data['surv_policy_go_live_date']==  pd.to_datetime("2021-09-19"), 'group']= 1

##Save propensity data to run propensity score matching in STATA

propensity_match_data.to_csv("../propensity_match_data.csv", index= False)

#########################





